package cn.ihoway.analysis;

public class Filter {

    public static String filter(String sentence) {
        String syllableRegex = "罢了|不成|得了|而已|的话|来着|了得|也罢|已而|着呢|着哩|着呐|来的|也好|便了|起见";
        //过滤符号和数字（不去掉英文空格）
        sentence = sentence.replaceAll("(?i)[^a-zA-Z+ \u4E00-\u9FA5]", "");
        //先过滤双音节词语
        //sentence = sentence.replaceAll(syllableRegex,"");
        //后过滤单音节词语
        //sentence = sentence.replaceAll(oneSyllableRegex,"");
        return sentence;
    }

}
